import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
df = pd.read_csv(r"C:\Users\manav\Downloads\CSV's\House rent\House_Rent_Dataset.csv")
df.head()
| Posted On | BHK | Rent | Size | Floor | Area Type | Area Locality | City | Furnishing Status | Tenant Preferred | Bathroom | Point of Contact | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-05-18 | 2 | 10000 | 1100 | Ground out of 2 | Super Area | Bandel | Kolkata | Unfurnished | Bachelors/Family | 2 | Contact Owner |
| 1 | 2022-05-13 | 2 | 20000 | 800 | 1 out of 3 | Super Area | Phool Bagan, Kankurgachi | Kolkata | Semi-Furnished | Bachelors/Family | 1 | Contact Owner |
| 2 | 2022-05-16 | 2 | 17000 | 1000 | 1 out of 3 | Super Area | Salt Lake City Sector 2 | Kolkata | Semi-Furnished | Bachelors/Family | 1 | Contact Owner |
| 3 | 2022-07-04 | 2 | 10000 | 800 | 1 out of 2 | Super Area | Dumdum Park | Kolkata | Unfurnished | Bachelors/Family | 1 | Contact Owner |
| 4 | 2022-05-09 | 2 | 7500 | 850 | 1 out of 2 | Carpet Area | South Dum Dum | Kolkata | Unfurnished | Bachelors | 1 | Contact Owner |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4746 entries, 0 to 4745 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Posted On 4746 non-null object 1 BHK 4746 non-null int64 2 Rent 4746 non-null int64 3 Size 4746 non-null int64 4 Floor 4746 non-null object 5 Area Type 4746 non-null object 6 Area Locality 4746 non-null object 7 City 4746 non-null object 8 Furnishing Status 4746 non-null object 9 Tenant Preferred 4746 non-null object 10 Bathroom 4746 non-null int64 11 Point of Contact 4746 non-null object dtypes: int64(4), object(8) memory usage: 445.1+ KB
df.isna().sum()
Posted On 0 BHK 0 Rent 0 Size 0 Floor 0 Area Type 0 Area Locality 0 City 0 Furnishing Status 0 Tenant Preferred 0 Bathroom 0 Point of Contact 0 dtype: int64
df.describe()
| BHK | Rent | Size | Bathroom | |
|---|---|---|---|---|
| count | 4746.000000 | 4.746000e+03 | 4746.000000 | 4746.000000 |
| mean | 2.083860 | 3.499345e+04 | 967.490729 | 1.965866 |
| std | 0.832256 | 7.810641e+04 | 634.202328 | 0.884532 |
| min | 1.000000 | 1.200000e+03 | 10.000000 | 1.000000 |
| 25% | 2.000000 | 1.000000e+04 | 550.000000 | 1.000000 |
| 50% | 2.000000 | 1.600000e+04 | 850.000000 | 2.000000 |
| 75% | 3.000000 | 3.300000e+04 | 1200.000000 | 2.000000 |
| max | 6.000000 | 3.500000e+06 | 8000.000000 | 10.000000 |
sns.pairplot(df,height=4)
<seaborn.axisgrid.PairGrid at 0x247565088e0>
fig = px.histogram(df,x='Rent',color_discrete_sequence = px.colors.qualitative.Set3, title='Rent Prices Distribution Histogram')
fig.show()
fig = px.box(df, x='Rent', title = 'Boxplot For Rent Prices')
fig.show()
max_value= df.max()
max_value
Posted On 2022-07-11 BHK 6 Rent 3500000 Size 8000 Floor Upper Basement out of 9 Area Type Super Area Area Locality whitefield City Mumbai Furnishing Status Unfurnished Tenant Preferred Family Bathroom 10 Point of Contact Contact Owner dtype: object
print(np.where(df['Rent']>2000000))
(array([1837], dtype=int64),)
df.drop([1837], axis=0,inplace=True)
fig=px.box(df,x='Rent',title='Boxplot For Rent Prices')
fig.show()
df['BHK'].value_counts()
2 2265 1 1167 3 1097 4 189 5 19 6 8 Name: BHK, dtype: int64
sns.set_style('whitegrid')
fig,axes=plt.subplots(figsize=(12,8))
colors=['#87ace8','#e3784d', '#6ecc64','#b644e3','#eb7c87', '#EAE509']
ax = sns.countplot(x='BHK',data=df,palette=['#e3784d','#87ace8', '#6ecc64','#b644e3','#eb7c87','#EAE509'])
for container in ax.containers:
ax.bar_label(container)
plt.title('Frequency of different number of BHKs present in Houses available for Rent',fontsize=15)
plt.show()
fig = px.pie(df, names='BHK', height=450, width= 450, color_discrete_sequence=px.colors.sequential.deep, title='Pie Chart for different number of BHKs present in Houses available for Rent')
fig.update_traces(textfont_size=15)
fig.show()
df['Bathroom'].value_counts()
2 2291 1 1474 3 748 4 156 5 60 6 12 7 3 10 1 Name: Bathroom, dtype: int64
sns.set_style("whitegrid")
fig,axes = plt.subplots(figsize=(15,8))
colors = ['#87ace8','#e3784d', '#6ecc64','#b644e3','#eb7c87', '#EAE509','#fab1f3','#86f7d7']
ax = sns.countplot(x='Bathroom',data=df, palette=['#e3784d','#87ace8', '#6ecc64','#b644e3','#eb7c87','#fab1f3','#86f7d7','#EAE509'])
for container in ax.containers:
ax.bar_label(container)
plt.title('Frequency of different number of Bathrooms present in Houses available for Rent',fontsize=15)
plt.show()
fig = px.pie(df, names='Bathroom', height=450, width= 650, color_discrete_sequence=px.colors.sequential.deep, title='Pie Chart for different number of Bathrooms present in Houses available for Rent')
fig.update_traces(textfont_size=15)
fig.show()
df['City'].value_counts()
Mumbai 972 Chennai 891 Bangalore 885 Hyderabad 868 Delhi 605 Kolkata 524 Name: City, dtype: int64
sns.set_style('whitegrid')
fig,axes = plt.subplots(figsize=(12,8))
colors = ['#87ace8','#e3784d', '#6ecc64','#b644e3','#eb7c87', '#EAE509']
ax = sns.countplot(x='City',data=df,palette=['#EAE509','#87ace8', '#6ecc64','#eb7c87','#e3784d','#b644e3'])
for container in ax.containers:
ax.bar_label(container)
plt.title('City wise Houses available for Rent',fontsize=15)
plt.show()
fig=px.pie(df,names='City',height=450,width=650,color_discrete_sequence=px.colors.sequential.Sunsetdark, title='Pie Chart for houses available for rent in different cities')
fig.update_traces(textfont_size=15)
fig.show()
df['Area Type'].value_counts()
Super Area 2446 Carpet Area 2297 Built Area 2 Name: Area Type, dtype: int64
colors = ['#87ace8', '#6ecc64', '#EAE509']
sns.set_style('whitegrid')
fig,axes = plt.subplots(figsize=(12,8))
ax = sns.countplot(x='Area Type',data=df, palette=['#87ace8', '#6ecc64', '#EAE509'])
for container in ax.containers:
ax.bar_label(container)
plt.title('Area wise rented houses distribution',fontsize=15)
plt.show()
df['Point of Contact'].value_counts()
Contact Owner 3216 Contact Agent 1528 Contact Builder 1 Name: Point of Contact, dtype: int64
sns.set_style('whitegrid')
fig,axes = plt.subplots(figsize=(10,5))
colors = ['#885122', '#7f557e', '#EAE509']
ax= sns.countplot(x='Point of Contact',data=df,palette = ['#885122', '#7f557e', '#EAE509'])
for container in ax.containers:
ax.bar_label(container)
plt.title('Point Of Contact Distribution For House Renting')
plt.show()
df['Tenant Preferred'].value_counts()
Bachelors/Family 3444 Bachelors 829 Family 472 Name: Tenant Preferred, dtype: int64
sns.set_style('whitegrid')
fig,axes = plt.subplots(figsize=(12,5))
color=['lightblue', 'lightgreen', 'yellow']
ax = sns.countplot(x='Tenant Preferred',data=df,palette = sns.dark_palette("red", n_colors=3))
for container in ax.containers:
ax.bar_label(container)
plt.title('Tenant Preferred Distribution for House Renting',fontsize=15)
plt.show()
df['Furnishing Status'].value_counts()
Semi-Furnished 2250 Unfurnished 1815 Furnished 680 Name: Furnishing Status, dtype: int64
sns.set_style('whitegrid')
fig,axes=plt.subplots(figsize=(12,8))
colors = sns.color_palette('Set1')
ax= sns.countplot(x='Furnishing Status',data=df,palette = sns.color_palette('Set1'))
for container in ax.containers:
ax.bar_label(container)
plt.title('Furnishing Status Distribution for house renting',fontsize=15)
plt.show()
df['Size'].value_counts()
1000 240
600 225
800 220
1200 193
500 192
...
2770 1
1252 1
904 1
721 1
855 1
Name: Size, Length: 615, dtype: int64
fig = px.histogram(df,x='Size',title='Size distribution')
fig.show()
fig = px.box(df,x='Size', title= 'Boxplot for Size')
fig.show()
fig, ax = plt.subplots(figsize=(15,10))
sns.stripplot(x="City", y='Rent', data=df, jitter=True)
plt.title('Scattered Rent Distribution by City', fontsize=15)
plt.xticks(rotation=45)
plt.show()
fig = px.sunburst(df, path=['City','Area Type', 'Furnishing Status', 'Tenant Preferred'], width=900,
height=900,title='Allotment of flats according to Bachelors/Family/(Bachelors/Family)',color_discrete_sequence=px.colors.cyclical.Phase)
fig.show()
##Dropping unnecesaary columns from dataset
rent_data = df.drop(['Posted On','Area Locality','Floor'],axis=1)
rent_data.head()
| BHK | Rent | Size | Area Type | City | Furnishing Status | Tenant Preferred | Bathroom | Point of Contact | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 10000 | 1100 | Super Area | Kolkata | Unfurnished | Bachelors/Family | 2 | Contact Owner |
| 1 | 2 | 20000 | 800 | Super Area | Kolkata | Semi-Furnished | Bachelors/Family | 1 | Contact Owner |
| 2 | 2 | 17000 | 1000 | Super Area | Kolkata | Semi-Furnished | Bachelors/Family | 1 | Contact Owner |
| 3 | 2 | 10000 | 800 | Super Area | Kolkata | Unfurnished | Bachelors/Family | 1 | Contact Owner |
| 4 | 2 | 7500 | 850 | Carpet Area | Kolkata | Unfurnished | Bachelors | 1 | Contact Owner |
!pip install nbconvert
Requirement already satisfied: nbconvert in c:\users\manav\anaconda3\lib\site-packages (6.1.0) Requirement already satisfied: testpath in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (0.5.0) Requirement already satisfied: bleach in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (4.0.0) Requirement already satisfied: jupyterlab-pygments in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (0.1.2) Requirement already satisfied: pandocfilters>=1.4.1 in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (1.4.3) Requirement already satisfied: defusedxml in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (0.7.1) Requirement already satisfied: entrypoints>=0.2.2 in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (0.3) Requirement already satisfied: pygments>=2.4.1 in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (2.10.0) Requirement already satisfied: jupyter-core in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (4.8.1) Requirement already satisfied: jinja2>=2.4 in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (3.1.3) Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (0.5.3) Requirement already satisfied: traitlets>=5.0 in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (5.1.0) Requirement already satisfied: mistune<2,>=0.8.1 in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (0.8.4) Requirement already satisfied: nbformat>=4.4 in c:\users\manav\anaconda3\lib\site-packages (from nbconvert) (5.1.3) Requirement already satisfied: MarkupSafe>=2.0 in c:\users\manav\anaconda3\lib\site-packages (from jinja2>=2.4->nbconvert) (2.1.5) Requirement already satisfied: jupyter-client>=6.1.5 in c:\users\manav\anaconda3\lib\site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert) (6.1.12) Requirement already satisfied: async-generator in c:\users\manav\anaconda3\lib\site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert) (1.10) Requirement already satisfied: nest-asyncio in c:\users\manav\anaconda3\lib\site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert) (1.5.1) Requirement already satisfied: python-dateutil>=2.1 in c:\users\manav\anaconda3\lib\site-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.5.0->nbconvert) (2.8.2) Requirement already satisfied: pyzmq>=13 in c:\users\manav\anaconda3\lib\site-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.5.0->nbconvert) (22.2.1) Requirement already satisfied: tornado>=4.1 in c:\users\manav\anaconda3\lib\site-packages (from jupyter-client>=6.1.5->nbclient<0.6.0,>=0.5.0->nbconvert) (6.1) Requirement already satisfied: pywin32>=1.0 in c:\users\manav\anaconda3\lib\site-packages (from jupyter-core->nbconvert) (228) Requirement already satisfied: ipython-genutils in c:\users\manav\anaconda3\lib\site-packages (from nbformat>=4.4->nbconvert) (0.2.0) Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in c:\users\manav\anaconda3\lib\site-packages (from nbformat>=4.4->nbconvert) (3.2.0) Requirement already satisfied: six>=1.11.0 in c:\users\manav\anaconda3\lib\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert) (1.16.0) Requirement already satisfied: attrs>=17.4.0 in c:\users\manav\anaconda3\lib\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert) (21.2.0) Requirement already satisfied: pyrsistent>=0.14.0 in c:\users\manav\anaconda3\lib\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert) (0.18.0) Requirement already satisfied: setuptools in c:\users\manav\anaconda3\lib\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.4->nbconvert) (58.0.4) Requirement already satisfied: packaging in c:\users\manav\anaconda3\lib\site-packages (from bleach->nbconvert) (21.0) Requirement already satisfied: webencodings in c:\users\manav\anaconda3\lib\site-packages (from bleach->nbconvert) (0.5.1) Requirement already satisfied: pyparsing>=2.0.2 in c:\users\manav\anaconda3\lib\site-packages (from packaging->bleach->nbconvert) (3.0.4)